https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
local repo version of data-transformation.pdf
29 November, 2020
https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
local repo version of data-transformation.pdf
| Task | Function | Package |
|---|---|---|
| Sorting | arrange() |
dplyr |
| Adding columns | mutate() |
dplyr |
| Transformations | mutate() |
dplyr |
| Re-ordering factor levels | factor(,levels=) |
base |
| Re-labelling | factor(,lab=) |
base |
recode() |
dplyr | |
| Re-naming columns | rename(,replace=) |
dplyr |
| Filtering/Subsetting | indexing | base |
| ~ columns | select(,...) |
dplyr |
pull(,...) |
dplyr | |
| ~ rows | filter(,...) |
dplyr |
| Unique combinations | distinct() |
dplyr |
| Reshaping data | pivot_longer(), pivot_wider() |
tidyr |
| Split/combine columns | separate(), unite() |
tidyr |
| Aggregating | group_by() summarise() |
dplyr |
group_by() count() |
dplyr | |
| Merging/joining | *_join() |
dplyr |
%>%data %>%
select(...) %>%
group_by(...) %>%
summarise(...)
load(file = "../data/manipulationDatasets.RData")
data.1
| Between | Plot | Cond | Time | Temp | LAT | LONG |
|---|---|---|---|---|---|---|
| A1 | P1 | H | 1 | 25.78 | 14.95 | 144.7 |
| A1 | P1 | M | 2 | 24.33 | 16.17 | 142.1 |
| A1 | P1 | L | 3 | 24.96 | 15.53 | 144 |
| A1 | P2 | H | 4 | 25.73 | 14.96 | 145.8 |
| A1 | P2 | M | 1 | 25.05 | 15.61 | 147.7 |
| A1 | P2 | L | 2 | 24.88 | 15.74 | 144.8 |
| A2 | P3 | H | 3 | 20.98 | 19.71 | 145.8 |
| A2 | P3 | M | 4 | 21.39 | 19.27 | 144.9 |
| A2 | P3 | L | 1 | 20.34 | 20.17 | 142.2 |
| A2 | P4 | H | 2 | 20.49 | 19.61 | 144.2 |
| A2 | P4 | M | 3 | 21.52 | 19.11 | 144.2 |
| A2 | P4 | L | 4 | 22.18 | 18.31 | 144.9 |
library(dplyr) library(tidyr) #OR better still library(tidyverse)
head(data.1)
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145.
#OR data.1 %>% head()
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145.
#OR data.1 %>% head
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145.
summary(data.1)
## Between Plot Cond Time Temp LAT LONG ## A1:6 P1:3 H:4 Min. :1.00 Min. :20.34 Min. :14.95 Min. :142.1 ## A2:6 P2:3 L:4 1st Qu.:1.75 1st Qu.:21.29 1st Qu.:15.59 1st Qu.:144.1 ## P3:3 M:4 Median :2.50 Median :23.25 Median :17.24 Median :144.7 ## P4:3 Mean :2.50 Mean :23.14 Mean :17.43 Mean :144.6 ## 3rd Qu.:3.25 3rd Qu.:24.99 3rd Qu.:19.35 3rd Qu.:145.1 ## Max. :4.00 Max. :25.78 Max. :20.17 Max. :147.7
summary(data.1)
## Between Plot Cond Time Temp LAT LONG ## A1:6 P1:3 H:4 Min. :1.00 Min. :20.34 Min. :14.95 Min. :142.1 ## A2:6 P2:3 L:4 1st Qu.:1.75 1st Qu.:21.29 1st Qu.:15.59 1st Qu.:144.1 ## P3:3 M:4 Median :2.50 Median :23.25 Median :17.24 Median :144.7 ## P4:3 Mean :2.50 Mean :23.14 Mean :17.43 Mean :144.6 ## 3rd Qu.:3.25 3rd Qu.:24.99 3rd Qu.:19.35 3rd Qu.:145.1 ## Max. :4.00 Max. :25.78 Max. :20.17 Max. :147.7
data.1 %>% summary()
## Between Plot Cond Time Temp LAT LONG ## A1:6 P1:3 H:4 Min. :1.00 Min. :20.34 Min. :14.95 Min. :142.1 ## A2:6 P2:3 L:4 1st Qu.:1.75 1st Qu.:21.29 1st Qu.:15.59 1st Qu.:144.1 ## P3:3 M:4 Median :2.50 Median :23.25 Median :17.24 Median :144.7 ## P4:3 Mean :2.50 Mean :23.14 Mean :17.43 Mean :144.6 ## 3rd Qu.:3.25 3rd Qu.:24.99 3rd Qu.:19.35 3rd Qu.:145.1 ## Max. :4.00 Max. :25.78 Max. :20.17 Max. :147.7
data.1 %>% summary
## Between Plot Cond Time Temp LAT LONG ## A1:6 P1:3 H:4 Min. :1.00 Min. :20.34 Min. :14.95 Min. :142.1 ## A2:6 P2:3 L:4 1st Qu.:1.75 1st Qu.:21.29 1st Qu.:15.59 1st Qu.:144.1 ## P3:3 M:4 Median :2.50 Median :23.25 Median :17.24 Median :144.7 ## P4:3 Mean :2.50 Mean :23.14 Mean :17.43 Mean :144.6 ## 3rd Qu.:3.25 3rd Qu.:24.99 3rd Qu.:19.35 3rd Qu.:145.1 ## Max. :4.00 Max. :25.78 Max. :20.17 Max. :147.7
str(data.1)
## tibble [12 × 7] (S3: tbl_df/tbl/data.frame) ## $ Between: Factor w/ 2 levels "A1","A2": 1 1 1 1 1 1 2 2 2 2 ... ## $ Plot : Factor w/ 4 levels "P1","P2","P3",..: 1 1 1 2 2 2 3 3 3 4 ... ## $ Cond : Factor w/ 3 levels "H","L","M": 1 3 2 1 3 2 1 3 2 1 ... ## $ Time : int [1:12] 1 2 3 4 1 2 3 4 1 2 ... ## $ Temp : num [1:12] 25.8 24.3 25 25.7 25.1 ... ## $ LAT : num [1:12] 14.9 16.2 15.5 15 15.6 ... ## $ LONG : num [1:12] 145 142 144 146 148 ...
glimpse(data.1)
## Rows: 12 ## Columns: 7 ## $ Between <fct> A1, A1, A1, A1, A1, A1, A2, A2, A2, A2, A2, A2 ## $ Plot <fct> P1, P1, P1, P2, P2, P2, P3, P3, P3, P4, P4, P4 ## $ Cond <fct> H, M, L, H, M, L, H, M, L, H, M, L ## $ Time <int> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ## $ Temp <dbl> 25.77507, 24.32564, 24.96428, 25.73127, 25.05280, 24.88189, 20.97769, 2… ## $ LAT <dbl> 14.94992, 16.16537, 15.53248, 14.95750, 15.61145, 15.73689, 19.70611, 1… ## $ LONG <dbl> 144.6884, 142.0585, 144.0437, 145.8359, 147.7174, 144.7944, 145.7753, 1…
data.1 %>% as_tibble
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145. ## 7 A2 P3 H 3 21.0 19.7 146. ## 8 A2 P3 M 4 21.4 19.3 145. ## 9 A2 P3 L 1 20.3 20.2 142. ## 10 A2 P4 H 2 20.5 19.6 144. ## 11 A2 P4 M 3 21.5 19.1 144. ## 12 A2 P4 L 4 22.2 18.3 145.
arrange)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Sorting by LAT
data.1 %>% arrange(LAT)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P2 H 4 25.7 15.0 146. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 M 1 25.1 15.6 148. ## 5 A1 P2 L 2 24.9 15.7 145. ## 6 A1 P1 M 2 24.3 16.2 142. ## 7 A2 P4 L 4 22.2 18.3 145. ## 8 A2 P4 M 3 21.5 19.1 144. ## 9 A2 P3 M 4 21.4 19.3 145. ## 10 A2 P4 H 2 20.5 19.6 144. ## 11 A2 P3 H 3 21.0 19.7 146. ## 12 A2 P3 L 1 20.3 20.2 142.
arrange)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Sorting by LAT (descending order)
data.1 %>% arrange(-LAT)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P3 L 1 20.3 20.2 142. ## 2 A2 P3 H 3 21.0 19.7 146. ## 3 A2 P4 H 2 20.5 19.6 144. ## 4 A2 P3 M 4 21.4 19.3 145. ## 5 A2 P4 M 3 21.5 19.1 144. ## 6 A2 P4 L 4 22.2 18.3 145. ## 7 A1 P1 M 2 24.3 16.2 142. ## 8 A1 P2 L 2 24.9 15.7 145. ## 9 A1 P2 M 1 25.1 15.6 148. ## 10 A1 P1 L 3 25.0 15.5 144. ## 11 A1 P2 H 4 25.7 15.0 146. ## 12 A1 P1 H 1 25.8 14.9 145.
arrange)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Sorting by Cond and then TEMP
data.1 %>% arrange(Cond,Temp)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P4 H 2 20.5 19.6 144. ## 2 A2 P3 H 3 21.0 19.7 146. ## 3 A1 P2 H 4 25.7 15.0 146. ## 4 A1 P1 H 1 25.8 14.9 145. ## 5 A2 P3 L 1 20.3 20.2 142. ## 6 A2 P4 L 4 22.2 18.3 145. ## 7 A1 P2 L 2 24.9 15.7 145. ## 8 A1 P1 L 3 25.0 15.5 144. ## 9 A2 P3 M 4 21.4 19.3 145. ## 10 A2 P4 M 3 21.5 19.1 144. ## 11 A1 P1 M 2 24.3 16.2 142. ## 12 A1 P2 M 1 25.1 15.6 148.
arrange)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Sort by the sum of Temp and LAT
data.1 %>% arrange(Temp+LAT)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P4 H 2 20.5 19.6 144. ## 2 A2 P4 L 4 22.2 18.3 145. ## 3 A1 P1 M 2 24.3 16.2 142. ## 4 A1 P1 L 3 25.0 15.5 144. ## 5 A2 P3 L 1 20.3 20.2 142. ## 6 A1 P2 L 2 24.9 15.7 145. ## 7 A2 P4 M 3 21.5 19.1 144. ## 8 A2 P3 M 4 21.4 19.3 145. ## 9 A1 P2 M 1 25.1 15.6 148. ## 10 A2 P3 H 3 21.0 19.7 146. ## 11 A1 P2 H 4 25.7 15.0 146. ## 12 A1 P1 H 1 25.8 14.9 145.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% arrange(Between,Cond)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P2 H 4 25.7 15.0 146. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 L 2 24.9 15.7 145. ## 5 A1 P1 M 2 24.3 16.2 142. ## 6 A1 P2 M 1 25.1 15.6 148. ## 7 A2 P3 H 3 21.0 19.7 146. ## 8 A2 P4 H 2 20.5 19.6 144. ## 9 A2 P3 L 1 20.3 20.2 142. ## 10 A2 P4 L 4 22.2 18.3 145. ## 11 A2 P3 M 4 21.4 19.3 145. ## 12 A2 P4 M 3 21.5 19.1 144.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% arrange(Cond,Temp/LAT)
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P4 H 2 20.5 19.6 144. ## 2 A2 P3 H 3 21.0 19.7 146. ## 3 A1 P2 H 4 25.7 15.0 146. ## 4 A1 P1 H 1 25.8 14.9 145. ## 5 A2 P3 L 1 20.3 20.2 142. ## 6 A2 P4 L 4 22.2 18.3 145. ## 7 A1 P2 L 2 24.9 15.7 145. ## 8 A1 P1 L 3 25.0 15.5 144. ## 9 A2 P3 M 4 21.4 19.3 145. ## 10 A2 P4 M 3 21.5 19.1 144. ## 11 A1 P1 M 2 24.3 16.2 142. ## 12 A1 P2 M 1 25.1 15.6 148.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% mutate(LL=LAT+LONG)
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG LL ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 160. ## 2 A1 P1 M 2 24.3 16.2 142. 158. ## 3 A1 P1 L 3 25.0 15.5 144. 160. ## 4 A1 P2 H 4 25.7 15.0 146. 161. ## 5 A1 P2 M 1 25.1 15.6 148. 163. ## 6 A1 P2 L 2 24.9 15.7 145. 161. ## 7 A2 P3 H 3 21.0 19.7 146. 165. ## 8 A2 P3 M 4 21.4 19.3 145. 164. ## 9 A2 P3 L 1 20.3 20.2 142. 162. ## 10 A2 P4 H 2 20.5 19.6 144. 164. ## 11 A2 P4 M 3 21.5 19.1 144. 163. ## 12 A2 P4 L 4 22.2 18.3 145. 163.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Transformations
data.1 %>% mutate(logTemp=log(Temp))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG logTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 3.25 ## 2 A1 P1 M 2 24.3 16.2 142. 3.19 ## 3 A1 P1 L 3 25.0 15.5 144. 3.22 ## 4 A1 P2 H 4 25.7 15.0 146. 3.25 ## 5 A1 P2 M 1 25.1 15.6 148. 3.22 ## 6 A1 P2 L 2 24.9 15.7 145. 3.21 ## 7 A2 P3 H 3 21.0 19.7 146. 3.04 ## 8 A2 P3 M 4 21.4 19.3 145. 3.06 ## 9 A2 P3 L 1 20.3 20.2 142. 3.01 ## 10 A2 P4 H 2 20.5 19.6 144. 3.02 ## 11 A2 P4 M 3 21.5 19.1 144. 3.07 ## 12 A2 P4 L 4 22.2 18.3 145. 3.10
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Centering
data.1 %>% mutate(MeanTemp=mean(Temp), cTemp=Temp-MeanTemp) ## OR if just want the centered variable.. #data.1 %>% mutate(cTemp=Temp-mean(Temp))
## # A tibble: 12 x 9 ## Between Plot Cond Time Temp LAT LONG MeanTemp cTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 23.1 2.64 ## 2 A1 P1 M 2 24.3 16.2 142. 23.1 1.19 ## 3 A1 P1 L 3 25.0 15.5 144. 23.1 1.83 ## 4 A1 P2 H 4 25.7 15.0 146. 23.1 2.60 ## 5 A1 P2 M 1 25.1 15.6 148. 23.1 1.92 ## 6 A1 P2 L 2 24.9 15.7 145. 23.1 1.75 ## 7 A2 P3 H 3 21.0 19.7 146. 23.1 -2.16 ## 8 A2 P3 M 4 21.4 19.3 145. 23.1 -1.74 ## 9 A2 P3 L 1 20.3 20.2 142. 23.1 -2.79 ## 10 A2 P4 H 2 20.5 19.6 144. 23.1 -2.65 ## 11 A2 P4 M 3 21.5 19.1 144. 23.1 -1.62 ## 12 A2 P4 L 4 22.2 18.3 145. 23.1 -0.957
data.1 %>% head(2) %>% as_tibble
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Changing vector types (classes)
data.1 %>% mutate(Time=factor(Time)) %>% as_tibble
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145. ## 7 A2 P3 H 3 21.0 19.7 146. ## 8 A2 P3 M 4 21.4 19.3 145. ## 9 A2 P3 L 1 20.3 20.2 142. ## 10 A2 P4 H 2 20.5 19.6 144. ## 11 A2 P4 M 3 21.5 19.1 144. ## 12 A2 P4 L 4 22.2 18.3 145.
data.1 %>% head(2) %>% as_tibble
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Changing factor labels
data.1 %>% mutate(Cond=fct_recode(Cond, High='H', Medium='M' )) %>% as_tibble
## # A tibble: 12 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 High 1 25.8 14.9 145. ## 2 A1 P1 Medium 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 High 4 25.7 15.0 146. ## 5 A1 P2 Medium 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145. ## 7 A2 P3 High 3 21.0 19.7 146. ## 8 A2 P3 Medium 4 21.4 19.3 145. ## 9 A2 P3 L 1 20.3 20.2 142. ## 10 A2 P4 High 2 20.5 19.6 144. ## 11 A2 P4 Medium 3 21.5 19.1 144. ## 12 A2 P4 L 4 22.2 18.3 145.
data.1 %>% head(2) %>% as_tibble
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Changing factor levels
data.1 %>% pull(Cond)
## [1] H M L H M L H M L H M L ## Levels: H L M
data.1 %>% mutate(Cond=fct_relevel(Cond, c('L', 'M','H'))) %>%
as_tibble() %>% pull(Cond)
## [1] H M L H M L H M L H M L ## Levels: L M H
data.1 %>% head(2) %>% as_tibble
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Changing factor levels
data.1 %>% pull(Cond)
## [1] H M L H M L H M L H M L ## Levels: H L M
data.1 %>% mutate(Cond=recode_factor(Cond, 'L'='Low', 'M'='Medium')) %>%
as_tibble() %>% pull(Cond)
## [1] H Medium Low H Medium Low H Medium Low H Medium Low ## Levels: Low Medium H
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% mutate(leadTemp=lead(Temp), lagTemp=lag(Temp))
## # A tibble: 12 x 9 ## Between Plot Cond Time Temp LAT LONG leadTemp lagTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 24.3 NA ## 2 A1 P1 M 2 24.3 16.2 142. 25.0 25.8 ## 3 A1 P1 L 3 25.0 15.5 144. 25.7 24.3 ## 4 A1 P2 H 4 25.7 15.0 146. 25.1 25.0 ## 5 A1 P2 M 1 25.1 15.6 148. 24.9 25.7 ## 6 A1 P2 L 2 24.9 15.7 145. 21.0 25.1 ## 7 A2 P3 H 3 21.0 19.7 146. 21.4 24.9 ## 8 A2 P3 M 4 21.4 19.3 145. 20.3 21.0 ## 9 A2 P3 L 1 20.3 20.2 142. 20.5 21.4 ## 10 A2 P4 H 2 20.5 19.6 144. 21.5 20.3 ## 11 A2 P4 M 3 21.5 19.1 144. 22.2 20.5 ## 12 A2 P4 L 4 22.2 18.3 145. NA 21.5
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Rank orders
data.1 %>% mutate(rankTime=min_rank(Time),
denseRankTime=dense_rank(Time))
## # A tibble: 12 x 9 ## Between Plot Cond Time Temp LAT LONG rankTime denseRankTime ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <int> <int> ## 1 A1 P1 H 1 25.8 14.9 145. 1 1 ## 2 A1 P1 M 2 24.3 16.2 142. 4 2 ## 3 A1 P1 L 3 25.0 15.5 144. 7 3 ## 4 A1 P2 H 4 25.7 15.0 146. 10 4 ## 5 A1 P2 M 1 25.1 15.6 148. 1 1 ## 6 A1 P2 L 2 24.9 15.7 145. 4 2 ## 7 A2 P3 H 3 21.0 19.7 146. 7 3 ## 8 A2 P3 M 4 21.4 19.3 145. 10 4 ## 9 A2 P3 L 1 20.3 20.2 142. 1 1 ## 10 A2 P4 H 2 20.5 19.6 144. 4 2 ## 11 A2 P4 M 3 21.5 19.1 144. 7 3 ## 12 A2 P4 L 4 22.2 18.3 145. 10 4
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Rank orders
data.1 %>% mutate(rowTemp=row_number(Temp), rowTime=row_number(Time),
rankTime=min_rank(Time))
## # A tibble: 12 x 10 ## Between Plot Cond Time Temp LAT LONG rowTemp rowTime rankTime ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <int> <int> <int> ## 1 A1 P1 H 1 25.8 14.9 145. 12 1 1 ## 2 A1 P1 M 2 24.3 16.2 142. 7 4 4 ## 3 A1 P1 L 3 25.0 15.5 144. 9 7 7 ## 4 A1 P2 H 4 25.7 15.0 146. 11 10 10 ## 5 A1 P2 M 1 25.1 15.6 148. 10 2 1 ## 6 A1 P2 L 2 24.9 15.7 145. 8 5 4 ## 7 A2 P3 H 3 21.0 19.7 146. 3 8 7 ## 8 A2 P3 M 4 21.4 19.3 145. 4 11 10 ## 9 A2 P3 L 1 20.3 20.2 142. 1 3 1 ## 10 A2 P4 H 2 20.5 19.6 144. 2 6 4 ## 11 A2 P4 M 3 21.5 19.1 144. 5 9 7 ## 12 A2 P4 L 4 22.2 18.3 145. 6 12 10
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Rank of bins
data.1 %>% mutate(ntile(Temp,4))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG `ntile(Temp, 4)` ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <int> ## 1 A1 P1 H 1 25.8 14.9 145. 4 ## 2 A1 P1 M 2 24.3 16.2 142. 3 ## 3 A1 P1 L 3 25.0 15.5 144. 3 ## 4 A1 P2 H 4 25.7 15.0 146. 4 ## 5 A1 P2 M 1 25.1 15.6 148. 4 ## 6 A1 P2 L 2 24.9 15.7 145. 3 ## 7 A2 P3 H 3 21.0 19.7 146. 1 ## 8 A2 P3 M 4 21.4 19.3 145. 2 ## 9 A2 P3 L 1 20.3 20.2 142. 1 ## 10 A2 P4 H 2 20.5 19.6 144. 1 ## 11 A2 P4 M 3 21.5 19.1 144. 2 ## 12 A2 P4 L 4 22.2 18.3 145. 2
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Logical bins
data.1 %>% mutate(between(Temp,20,25))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG `between(Temp, 20, 25)` ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <lgl> ## 1 A1 P1 H 1 25.8 14.9 145. FALSE ## 2 A1 P1 M 2 24.3 16.2 142. TRUE ## 3 A1 P1 L 3 25.0 15.5 144. TRUE ## 4 A1 P2 H 4 25.7 15.0 146. FALSE ## 5 A1 P2 M 1 25.1 15.6 148. FALSE ## 6 A1 P2 L 2 24.9 15.7 145. TRUE ## 7 A2 P3 H 3 21.0 19.7 146. TRUE ## 8 A2 P3 M 4 21.4 19.3 145. TRUE ## 9 A2 P3 L 1 20.3 20.2 142. TRUE ## 10 A2 P4 H 2 20.5 19.6 144. TRUE ## 11 A2 P4 M 3 21.5 19.1 144. TRUE ## 12 A2 P4 L 4 22.2 18.3 145. TRUE
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Categorical bins
data.1 %>% mutate(fTemp=ifelse(Temp<21, 'Low',
ifelse(between(Temp,21,25), 'Medium', 'High')))
## OR
data.1 %>% mutate(fTemp=case_when(Temp<21 ~ 'Low',
between(Temp, 21, 25) ~ 'Medium',
Temp>25 ~ 'High'))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG fTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <chr> ## 1 A1 P1 H 1 25.8 14.9 145. High ## 2 A1 P1 M 2 24.3 16.2 142. Medium ## 3 A1 P1 L 3 25.0 15.5 144. Medium ## 4 A1 P2 H 4 25.7 15.0 146. High ## 5 A1 P2 M 1 25.1 15.6 148. High ## 6 A1 P2 L 2 24.9 15.7 145. Medium ## 7 A2 P3 H 3 21.0 19.7 146. Low ## 8 A2 P3 M 4 21.4 19.3 145. Medium ## 9 A2 P3 L 1 20.3 20.2 142. Low ## 10 A2 P4 H 2 20.5 19.6 144. Low ## 11 A2 P4 M 3 21.5 19.1 144. Medium ## 12 A2 P4 L 4 22.2 18.3 145. Medium ## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG fTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <chr> ## 1 A1 P1 H 1 25.8 14.9 145. High ## 2 A1 P1 M 2 24.3 16.2 142. Medium ## 3 A1 P1 L 3 25.0 15.5 144. Medium ## 4 A1 P2 H 4 25.7 15.0 146. High ## 5 A1 P2 M 1 25.1 15.6 148. High ## 6 A1 P2 L 2 24.9 15.7 145. Medium ## 7 A2 P3 H 3 21.0 19.7 146. Low ## 8 A2 P3 M 4 21.4 19.3 145. Medium ## 9 A2 P3 L 1 20.3 20.2 142. Low ## 10 A2 P4 H 2 20.5 19.6 144. Low ## 11 A2 P4 M 3 21.5 19.1 144. Medium ## 12 A2 P4 L 4 22.2 18.3 145. Medium
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Categorical bins
data.1 %>% mutate(fTemp=cut(Temp, breaks=c(0,21,25,100),
labels=c('Low','Medium','High')))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG fTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <fct> ## 1 A1 P1 H 1 25.8 14.9 145. High ## 2 A1 P1 M 2 24.3 16.2 142. Medium ## 3 A1 P1 L 3 25.0 15.5 144. Medium ## 4 A1 P2 H 4 25.7 15.0 146. High ## 5 A1 P2 M 1 25.1 15.6 148. High ## 6 A1 P2 L 2 24.9 15.7 145. Medium ## 7 A2 P3 H 3 21.0 19.7 146. Low ## 8 A2 P3 M 4 21.4 19.3 145. Medium ## 9 A2 P3 L 1 20.3 20.2 142. Low ## 10 A2 P4 H 2 20.5 19.6 144. Low ## 11 A2 P4 M 3 21.5 19.1 144. Medium ## 12 A2 P4 L 4 22.2 18.3 145. Medium
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Bin Latitude into North, Central and Southern based on Latitude
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Bin Latitude into Northern, Central and Southern based on Latitude
Assuming even spread..
data.1 %>% mutate(Region = cut(LAT, breaks=3,
labels=c('Northern', 'Centeral', 'Southern')))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG Region ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <fct> ## 1 A1 P1 H 1 25.8 14.9 145. Northern ## 2 A1 P1 M 2 24.3 16.2 142. Northern ## 3 A1 P1 L 3 25.0 15.5 144. Northern ## 4 A1 P2 H 4 25.7 15.0 146. Northern ## 5 A1 P2 M 1 25.1 15.6 148. Northern ## 6 A1 P2 L 2 24.9 15.7 145. Northern ## 7 A2 P3 H 3 21.0 19.7 146. Southern ## 8 A2 P3 M 4 21.4 19.3 145. Southern ## 9 A2 P3 L 1 20.3 20.2 142. Southern ## 10 A2 P4 H 2 20.5 19.6 144. Southern ## 11 A2 P4 M 3 21.5 19.1 144. Southern ## 12 A2 P4 L 4 22.2 18.3 145. Centeral
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Bin Latitude into Northern, Central and Southern based on Latitude
data.1 %>% mutate(Region = cut(LAT, breaks=3,
labels=c('Northern', 'Centeral', 'Southern'))) %>%
mutate(Between = fct_inorder(Region))
## # A tibble: 12 x 8 ## Between Plot Cond Time Temp LAT LONG Region ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <fct> ## 1 Northern P1 H 1 25.8 14.9 145. Northern ## 2 Northern P1 M 2 24.3 16.2 142. Northern ## 3 Northern P1 L 3 25.0 15.5 144. Northern ## 4 Northern P2 H 4 25.7 15.0 146. Northern ## 5 Northern P2 M 1 25.1 15.6 148. Northern ## 6 Northern P2 L 2 24.9 15.7 145. Northern ## 7 Southern P3 H 3 21.0 19.7 146. Southern ## 8 Southern P3 M 4 21.4 19.3 145. Southern ## 9 Southern P3 L 1 20.3 20.2 142. Southern ## 10 Southern P4 H 2 20.5 19.6 144. Southern ## 11 Southern P4 M 3 21.5 19.1 144. Southern ## 12 Centeral P4 L 4 22.2 18.3 145. Centeral
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% summarise(MeanTemp=mean(Temp), VarTemp=var(Temp), N=n())
## # A tibble: 1 x 3 ## MeanTemp VarTemp N ## <dbl> <dbl> <int> ## 1 23.1 4.66 12
SE <- function(x) sd(x)/sqrt(length(x))
data.1 %>% summarise(MeanTemp=mean(Temp), VarTemp=var(Temp),
SEM=SE(Temp))
## # A tibble: 1 x 3 ## MeanTemp VarTemp SEM ## <dbl> <dbl> <dbl> ## 1 23.1 4.66 0.623
Across versions
data.1 %>% summarise(across(c(Temp,LAT), list(Mean=mean, Var=var)))
## # A tibble: 1 x 4 ## Temp_Mean Temp_Var LAT_Mean LAT_Var ## <dbl> <dbl> <dbl> <dbl> ## 1 23.1 4.66 17.4 4.37
data.1 %>% summarise(across(where(is.numeric), list(Mean=mean, Var=var)))
## # A tibble: 1 x 8 ## Time_Mean Time_Var Temp_Mean Temp_Var LAT_Mean LAT_Var LONG_Mean LONG_Var ## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 2.5 1.36 23.1 4.66 17.4 4.37 145. 2.32
data.1 %>% summarize( across(where(is.numeric), mean),
across(where(is.factor), length))
## # A tibble: 1 x 7 ## Time Temp LAT LONG Between Plot Cond ## <dbl> <dbl> <dbl> <dbl> <int> <int> <int> ## 1 2.5 23.1 17.4 145. 12 12 12
data.1 %>% count(Cond)
## # A tibble: 3 x 2 ## Cond n ## <fct> <int> ## 1 H 4 ## 2 L 4 ## 3 M 4
data.1 %>% count(Cond,between(Temp,20,30))
## # A tibble: 3 x 3 ## Cond `between(Temp, 20, 30)` n ## <fct> <lgl> <int> ## 1 H TRUE 4 ## 2 L TRUE 4 ## 3 M TRUE 4
data.1 %>% head(6)
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145.
data.1 %>% group_by(Between,Plot) %>%
summarise(Mean=mean(Temp))
## # A tibble: 4 x 3 ## # Groups: Between [2] ## Between Plot Mean ## <fct> <fct> <dbl> ## 1 A1 P1 25.0 ## 2 A1 P2 25.2 ## 3 A2 P3 20.9 ## 4 A2 P4 21.4
data.1 %>% head(6)
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145.
data.1 %>% group_by(Between,Plot) %>%
summarise(Mean=mean(Temp), Var=var(Temp), N=n(),First=first(Temp))
## # A tibble: 4 x 6 ## # Groups: Between [2] ## Between Plot Mean Var N First ## <fct> <fct> <dbl> <dbl> <int> <dbl> ## 1 A1 P1 25.0 0.528 3 25.8 ## 2 A1 P2 25.2 0.202 3 25.7 ## 3 A2 P3 20.9 0.280 3 21.0 ## 4 A2 P4 21.4 0.724 3 20.5
mutate vs summarise
data.1 %>% group_by(Between,Plot) %>%
summarise(Mean=mean(Temp))
## # A tibble: 4 x 3 ## # Groups: Between [2] ## Between Plot Mean ## <fct> <fct> <dbl> ## 1 A1 P1 25.0 ## 2 A1 P2 25.2 ## 3 A2 P3 20.9 ## 4 A2 P4 21.4
data.1 %>% group_by(Between,Plot) %>%
mutate(Mean=mean(Temp))
## # A tibble: 12 x 8 ## # Groups: Between, Plot [4] ## Between Plot Cond Time Temp LAT LONG Mean ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 25.0 ## 2 A1 P1 M 2 24.3 16.2 142. 25.0 ## 3 A1 P1 L 3 25.0 15.5 144. 25.0 ## 4 A1 P2 H 4 25.7 15.0 146. 25.2 ## 5 A1 P2 M 1 25.1 15.6 148. 25.2 ## 6 A1 P2 L 2 24.9 15.7 145. 25.2 ## 7 A2 P3 H 3 21.0 19.7 146. 20.9 ## 8 A2 P3 M 4 21.4 19.3 145. 20.9 ## 9 A2 P3 L 1 20.3 20.2 142. 20.9 ## 10 A2 P4 H 2 20.5 19.6 144. 21.4 ## 11 A2 P4 M 3 21.5 19.1 144. 21.4 ## 12 A2 P4 L 4 22.2 18.3 145. 21.4
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% group_by(Between,Plot) %>% mutate(Mean=mean(Temp), cTemp=Temp-Mean)
## # A tibble: 12 x 9 ## # Groups: Between, Plot [4] ## Between Plot Cond Time Temp LAT LONG Mean cTemp ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. 25.0 0.753 ## 2 A1 P1 M 2 24.3 16.2 142. 25.0 -0.696 ## 3 A1 P1 L 3 25.0 15.5 144. 25.0 -0.0574 ## 4 A1 P2 H 4 25.7 15.0 146. 25.2 0.509 ## 5 A1 P2 M 1 25.1 15.6 148. 25.2 -0.169 ## 6 A1 P2 L 2 24.9 15.7 145. 25.2 -0.340 ## 7 A2 P3 H 3 21.0 19.7 146. 20.9 0.0746 ## 8 A2 P3 M 4 21.4 19.3 145. 20.9 0.488 ## 9 A2 P3 L 1 20.3 20.2 142. 20.9 -0.562 ## 10 A2 P4 H 2 20.5 19.6 144. 21.4 -0.905 ## 11 A2 P4 M 3 21.5 19.1 144. 21.4 0.122 ## 12 A2 P4 L 4 22.2 18.3 145. 21.4 0.783
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% group_by(Between,Plot) %>% summarise(across(everything(), mean))
## # A tibble: 4 x 7 ## # Groups: Between [2] ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 NA 2 25.0 15.5 144. ## 2 A1 P2 NA 2.33 25.2 15.4 146. ## 3 A2 P3 NA 2.67 20.9 19.7 144. ## 4 A2 P4 NA 3 21.4 19.0 144.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(-Cond,-Time) %>% group_by(Between,Plot) %>%
summarise_all(list(mean))
## # A tibble: 4 x 5 ## # Groups: Between [2] ## Between Plot Temp LAT LONG ## <fct> <fct> <dbl> <dbl> <dbl> ## 1 A1 P1 25.0 15.5 144. ## 2 A1 P2 25.2 15.4 146. ## 3 A2 P3 20.9 19.7 144. ## 4 A2 P4 21.4 19.0 144.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% group_by(Between,Plot) %>%
summarise(across(c(Temp, LAT, LONG), list(Mean=mean, SE=SE)))
## # A tibble: 4 x 8 ## # Groups: Between [2] ## Between Plot Temp_Mean Temp_SE LAT_Mean LAT_SE LONG_Mean LONG_SE ## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 A1 P1 25.0 0.419 15.5 0.351 144. 0.791 ## 2 A1 P2 25.2 0.259 15.4 0.242 146. 0.855 ## 3 A2 P3 20.9 0.305 19.7 0.262 144. 1.06 ## 4 A2 P4 21.4 0.491 19.0 0.379 144. 0.231
Calculate for each year, the mean abundance of Pocillopora damicornis
tikus[1:10,c(1:3,76:77)]
## Psammocora contigua Psammocora digitata Pocillopora damicornis time rep ## V1 0 0 79 81 1 ## V2 0 0 51 81 2 ## V3 0 0 42 81 3 ## V4 0 0 15 81 4 ## V5 0 0 9 81 5 ## V6 0 0 72 81 6 ## V7 0 0 0 81 7 ## V8 0 0 16 81 8 ## V9 0 0 0 81 9 ## V10 0 0 16 81 10
NOTE to operate on columns whose names contain special characters (including spaces), you must use `` (backticks).
tikus %>% arrange(`Pocillopora damicornis`)
Calculate for each year, the mean abundance of Pocillopora damicornis
tikus %>% group_by(time) %>%
summarise(MeanAbundance=mean(`Pocillopora damicornis`))
## # A tibble: 6 x 2 ## time MeanAbundance ## <fct> <dbl> ## 1 81 30 ## 2 83 0 ## 3 84 0 ## 4 85 0 ## 5 87 1.8 ## 6 88 4
Calculate for each year, the number of samples as well as the mean and variance of ozone
nasa = as.data.frame(nasa) head(nasa)
## lat long month year cloudhigh cloudlow cloudmid ozone pressure surftemp temperature ## 1 36.20000 -113.8 1 1995 26.0 7.5 34.5 304 835 272.7 272.1 ## 2 33.70435 -113.8 1 1995 20.0 11.5 32.5 304 940 279.5 282.2 ## 3 31.20870 -113.8 1 1995 16.0 16.5 26.0 298 960 284.7 285.2 ## 4 28.71304 -113.8 1 1995 13.0 20.5 14.5 276 990 289.3 290.7 ## 5 26.21739 -113.8 1 1995 7.5 26.0 10.5 274 1000 292.2 292.7 ## 6 23.72174 -113.8 1 1995 8.0 30.0 9.5 264 1000 294.1 293.6
Calculate for each year, the number of samples as well as the mean and variance of ozone
nasa %>% group_by(year) %>%
summarise(N=n(),Mean=mean(ozone), Var=var(ozone))
## # A tibble: 6 x 4 ## year N Mean Var ## <int> <int> <dbl> <dbl> ## 1 1995 6912 264. 258. ## 2 1996 6912 267. 326. ## 3 1997 6912 266. 327. ## 4 1998 6912 267. 507. ## 5 1999 6912 270. 368. ## 6 2000 6912 269. 353.
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(Between,Plot,Cond,Time,Temp)
## # A tibble: 12 x 5 ## Between Plot Cond Time Temp ## <fct> <fct> <fct> <int> <dbl> ## 1 A1 P1 H 1 25.8 ## 2 A1 P1 M 2 24.3 ## 3 A1 P1 L 3 25.0 ## 4 A1 P2 H 4 25.7 ## 5 A1 P2 M 1 25.1 ## 6 A1 P2 L 2 24.9 ## 7 A2 P3 H 3 21.0 ## 8 A2 P3 M 4 21.4 ## 9 A2 P3 L 1 20.3 ## 10 A2 P4 H 2 20.5 ## 11 A2 P4 M 3 21.5 ## 12 A2 P4 L 4 22.2
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(-LAT,-LONG)
## # A tibble: 12 x 5 ## Between Plot Cond Time Temp ## <fct> <fct> <fct> <int> <dbl> ## 1 A1 P1 H 1 25.8 ## 2 A1 P1 M 2 24.3 ## 3 A1 P1 L 3 25.0 ## 4 A1 P2 H 4 25.7 ## 5 A1 P2 M 1 25.1 ## 6 A1 P2 L 2 24.9 ## 7 A2 P3 H 3 21.0 ## 8 A2 P3 M 4 21.4 ## 9 A2 P3 L 1 20.3 ## 10 A2 P4 H 2 20.5 ## 11 A2 P4 M 3 21.5 ## 12 A2 P4 L 4 22.2
select)contains()ends_with()starts_with()matches()everything()across()must evaluate to TRUE/FALSE
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(contains('L'))
## # A tibble: 12 x 3 ## Plot LAT LONG ## <fct> <dbl> <dbl> ## 1 P1 14.9 145. ## 2 P1 16.2 142. ## 3 P1 15.5 144. ## 4 P2 15.0 146. ## 5 P2 15.6 148. ## 6 P2 15.7 145. ## 7 P3 19.7 146. ## 8 P3 19.3 145. ## 9 P3 20.2 142. ## 10 P4 19.6 144. ## 11 P4 19.1 144. ## 12 P4 18.3 145.
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(starts_with('L'))
## # A tibble: 12 x 2 ## LAT LONG ## <dbl> <dbl> ## 1 14.9 145. ## 2 16.2 142. ## 3 15.5 144. ## 4 15.0 146. ## 5 15.6 148. ## 6 15.7 145. ## 7 19.7 146. ## 8 19.3 145. ## 9 20.2 142. ## 10 19.6 144. ## 11 19.1 144. ## 12 18.3 145.
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(ends_with('t'))
## # A tibble: 12 x 2 ## Plot LAT ## <fct> <dbl> ## 1 P1 14.9 ## 2 P1 16.2 ## 3 P1 15.5 ## 4 P2 15.0 ## 5 P2 15.6 ## 6 P2 15.7 ## 7 P3 19.7 ## 8 P3 19.3 ## 9 P3 20.2 ## 10 P4 19.6 ## 11 P4 19.1 ## 12 P4 18.3
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(matches('^T[a-z]m.'))
## # A tibble: 12 x 2 ## Time Temp ## <int> <dbl> ## 1 1 25.8 ## 2 2 24.3 ## 3 3 25.0 ## 4 4 25.7 ## 5 1 25.1 ## 6 2 24.9 ## 7 3 21.0 ## 8 4 21.4 ## 9 1 20.3 ## 10 2 20.5 ## 11 3 21.5 ## 12 4 22.2
https://github.com/rstudio/cheatsheets/raw/master/regex.pdf
local repo version of regex.pdf
select)data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% select(Between:Temp)
## # A tibble: 12 x 5 ## Between Plot Cond Time Temp ## <fct> <fct> <fct> <int> <dbl> ## 1 A1 P1 H 1 25.8 ## 2 A1 P1 M 2 24.3 ## 3 A1 P1 L 3 25.0 ## 4 A1 P2 H 4 25.7 ## 5 A1 P2 M 1 25.1 ## 6 A1 P2 L 2 24.9 ## 7 A2 P3 H 3 21.0 ## 8 A2 P3 M 4 21.4 ## 9 A2 P3 L 1 20.3 ## 10 A2 P4 H 2 20.5 ## 11 A2 P4 M 3 21.5 ## 12 A2 P4 L 4 22.2
nasa %>% head()
## lat long month year cloudhigh cloudlow cloudmid ozone pressure surftemp temperature ## 1 36.20000 -113.8 1 1995 26.0 7.5 34.5 304 835 272.7 272.1 ## 2 33.70435 -113.8 1 1995 20.0 11.5 32.5 304 940 279.5 282.2 ## 3 31.20870 -113.8 1 1995 16.0 16.5 26.0 298 960 284.7 285.2 ## 4 28.71304 -113.8 1 1995 13.0 20.5 14.5 276 990 289.3 290.7 ## 5 26.21739 -113.8 1 1995 7.5 26.0 10.5 274 1000 292.2 292.7 ## 6 23.72174 -113.8 1 1995 8.0 30.0 9.5 264 1000 294.1 293.6
Select lat, long, and cloud.. columns
nasa %>% head()
## lat long month year cloudhigh cloudlow cloudmid ozone pressure surftemp temperature ## 1 36.20000 -113.8 1 1995 26.0 7.5 34.5 304 835 272.7 272.1 ## 2 33.70435 -113.8 1 1995 20.0 11.5 32.5 304 940 279.5 282.2 ## 3 31.20870 -113.8 1 1995 16.0 16.5 26.0 298 960 284.7 285.2 ## 4 28.71304 -113.8 1 1995 13.0 20.5 14.5 276 990 289.3 290.7 ## 5 26.21739 -113.8 1 1995 7.5 26.0 10.5 274 1000 292.2 292.7 ## 6 23.72174 -113.8 1 1995 8.0 30.0 9.5 264 1000 294.1 293.6
nasa %>% select(lat, long, starts_with("cloud")) %>% head
## lat long cloudhigh cloudlow cloudmid ## 1 36.20000 -113.8 26.0 7.5 34.5 ## 2 33.70435 -113.8 20.0 11.5 32.5 ## 3 31.20870 -113.8 16.0 16.5 26.0 ## 4 28.71304 -113.8 13.0 20.5 14.5 ## 5 26.21739 -113.8 7.5 26.0 10.5 ## 6 23.72174 -113.8 8.0 30.0 9.5
tikus[1:10,c(1:3,76:77)]
## Psammocora contigua Psammocora digitata Pocillopora damicornis time rep ## V1 0 0 79 81 1 ## V2 0 0 51 81 2 ## V3 0 0 42 81 3 ## V4 0 0 15 81 4 ## V5 0 0 9 81 5 ## V6 0 0 72 81 6 ## V7 0 0 0 81 7 ## V8 0 0 16 81 8 ## V9 0 0 0 81 9 ## V10 0 0 16 81 10
Select rep, time and only Species that DONT contain pora
Select rep, time and only Species that DONT contain pora
tikas %>% dplyr::select(-contains('pora'))
## OR if we wanted to alter the order...
tikas %>% dplyr::select(rep, time, everything(),-contains('pora'))
dplyr::select(tikus, `Pocillopora damicornis`)
## Pocillopora damicornis ## V1 79 ## V2 51 ## V3 42 ## V4 15 ## V5 9 ## V6 72 ## V7 0 ## V8 16 ## V9 0 ## V10 16 ## V11 0 ## V12 0 ## V13 0 ## V14 0 ## V15 0 ## V16 0 ## V17 0 ## V18 0 ## V19 0 ## V20 0 ## V21 0 ## V22 0 ## V23 0 ## V24 0 ## V25 0 ## V26 0 ## V27 0 ## V28 0 ## V29 0 ## V30 0 ## V31 0 ## V32 0 ## V33 0 ## V34 0 ## V35 0 ## V36 0 ## V37 0 ## V38 0 ## V39 0 ## V40 0 ## V41 18 ## V42 0 ## V43 0 ## V44 0 ## V45 0 ## V46 0 ## V47 0 ## V48 0 ## V49 0 ## V50 0 ## V51 0 ## V52 0 ## V53 0 ## V54 0 ## V55 0 ## V56 0 ## V57 10 ## V58 0 ## V59 30 ## V60 0
data.1 %>% pull(Temp)
## [1] 25.77507 24.32564 24.96428 25.73127 25.05280 24.88189 20.97769 21.39090 20.34081 ## [10] 20.48899 21.51637 22.17791
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% rename(Condition=Cond, Temperature=Temp)
## # A tibble: 12 x 7 ## Between Plot Condition Time Temperature LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 M 1 25.1 15.6 148. ## 6 A1 P2 L 2 24.9 15.7 145. ## 7 A2 P3 H 3 21.0 19.7 146. ## 8 A2 P3 M 4 21.4 19.3 145. ## 9 A2 P3 L 1 20.3 20.2 142. ## 10 A2 P4 H 2 20.5 19.6 144. ## 11 A2 P4 M 3 21.5 19.1 144. ## 12 A2 P4 L 4 22.2 18.3 145.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% filter(Cond=='H')
## # A tibble: 4 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P2 H 4 25.7 15.0 146. ## 3 A2 P3 H 3 21.0 19.7 146. ## 4 A2 P4 H 2 20.5 19.6 144.
data.1 %>% filter(Cond %in% c('H','M'))
## # A tibble: 8 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P2 H 4 25.7 15.0 146. ## 4 A1 P2 M 1 25.1 15.6 148. ## 5 A2 P3 H 3 21.0 19.7 146. ## 6 A2 P3 M 4 21.4 19.3 145. ## 7 A2 P4 H 2 20.5 19.6 144. ## 8 A2 P4 M 3 21.5 19.1 144.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% filter(Cond=='H' & Temp<25)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P3 H 3 21.0 19.7 146. ## 2 A2 P4 H 2 20.5 19.6 144.
data.1 %>% filter(Cond=='H' | Temp<25)
## # A tibble: 11 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A1 P2 L 2 24.9 15.7 145. ## 6 A2 P3 H 3 21.0 19.7 146. ## 7 A2 P3 M 4 21.4 19.3 145. ## 8 A2 P3 L 1 20.3 20.2 142. ## 9 A2 P4 H 2 20.5 19.6 144. ## 10 A2 P4 M 3 21.5 19.1 144. ## 11 A2 P4 L 4 22.2 18.3 145.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Keep only those rows with Temp less than 20 and LAT greater than 20 or LONG less than 145
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
Keep only those rows with Temp less than 20 and LAT greater than 20, or LONG less than 145
data.1 %>% filter(Temp<20 & (LAT>20 | LONG <145))
## # A tibble: 0 x 7 ## # … with 7 variables: Between <fct>, Plot <fct>, Cond <fct>, Time <int>, Temp <dbl>, ## # LAT <dbl>, LONG <dbl>
glimpse(nasa)
## Rows: 41,472 ## Columns: 11 ## $ lat <dbl> 36.200000, 33.704348, 31.208696, 28.713043, 26.217391, 23.721739, 2… ## $ long <dbl> -113.8000, -113.8000, -113.8000, -113.8000, -113.8000, -113.8000, -… ## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… ## $ year <int> 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1… ## $ cloudhigh <dbl> 26.0, 20.0, 16.0, 13.0, 7.5, 8.0, 14.5, 19.5, 22.5, 21.0, 19.0, 16.… ## $ cloudlow <dbl> 7.5, 11.5, 16.5, 20.5, 26.0, 30.0, 29.5, 26.5, 27.5, 26.0, 28.5, 28… ## $ cloudmid <dbl> 34.5, 32.5, 26.0, 14.5, 10.5, 9.5, 11.0, 17.5, 18.5, 16.5, 12.5, 13… ## $ ozone <dbl> 304, 304, 298, 276, 274, 264, 258, 252, 250, 250, 248, 248, 250, 24… ## $ pressure <dbl> 835, 940, 960, 990, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,… ## $ surftemp <dbl> 272.7, 279.5, 284.7, 289.3, 292.2, 294.1, 295.0, 298.3, 300.1, 300.… ## $ temperature <dbl> 272.1, 282.2, 285.2, 290.7, 292.7, 293.6, 294.6, 296.9, 297.8, 298.…
Filter to the largest ozone value for the second month of the last year
Filter to the largest ozone value for the second month of the last year
nasa %>% filter(year==max(year) & month==2) %>%
arrange(-ozone) %>% head(5)
nasa %>% filter(year==max(year) & month==2) %>%
arrange(-ozone) %>% slice(1:5)
##OR
nasa %>% filter(year==max(year) & month==2 ) %>%
top_n(5, ozone)
Filter to all ozone values between 320 and 325 in the first month of the last year
glimpse(nasa)
## Rows: 41,472 ## Columns: 11 ## $ lat <dbl> 36.200000, 33.704348, 31.208696, 28.713043, 26.217391, 23.721739, 2… ## $ long <dbl> -113.8000, -113.8000, -113.8000, -113.8000, -113.8000, -113.8000, -… ## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… ## $ year <int> 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1… ## $ cloudhigh <dbl> 26.0, 20.0, 16.0, 13.0, 7.5, 8.0, 14.5, 19.5, 22.5, 21.0, 19.0, 16.… ## $ cloudlow <dbl> 7.5, 11.5, 16.5, 20.5, 26.0, 30.0, 29.5, 26.5, 27.5, 26.0, 28.5, 28… ## $ cloudmid <dbl> 34.5, 32.5, 26.0, 14.5, 10.5, 9.5, 11.0, 17.5, 18.5, 16.5, 12.5, 13… ## $ ozone <dbl> 304, 304, 298, 276, 274, 264, 258, 252, 250, 250, 248, 248, 250, 24… ## $ pressure <dbl> 835, 940, 960, 990, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,… ## $ surftemp <dbl> 272.7, 279.5, 284.7, 289.3, 292.2, 294.1, 295.0, 298.3, 300.1, 300.… ## $ temperature <dbl> 272.1, 282.2, 285.2, 290.7, 292.7, 293.6, 294.6, 296.9, 297.8, 298.…
Filter to all ozone values between 320 and 325 in the first month of the last year
nasa %>% filter(ozone > 320 & ozone<325, month==first(month),
year==last(year))
##OR
nasa %>% filter(between(ozone,320,325), month==first(month),
year==last(year))
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% slice(1:4)
## # A tibble: 4 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146.
data.1 %>% slice(c(1:4,7))
## # A tibble: 5 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A1 P2 H 4 25.7 15.0 146. ## 5 A2 P3 H 3 21.0 19.7 146.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% sample_n(10, replace=TRUE)
## # A tibble: 10 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 L 3 25.0 15.5 144. ## 3 A1 P1 L 3 25.0 15.5 144. ## 4 A2 P3 M 4 21.4 19.3 145. ## 5 A1 P2 L 2 24.9 15.7 145. ## 6 A2 P3 H 3 21.0 19.7 146. ## 7 A2 P4 L 4 22.2 18.3 145. ## 8 A1 P2 L 2 24.9 15.7 145. ## 9 A2 P3 M 4 21.4 19.3 145. ## 10 A2 P3 H 3 21.0 19.7 146.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
data.1 %>% sample_frac(0.5, replace=TRUE)
## # A tibble: 6 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A2 P4 M 3 21.5 19.1 144. ## 2 A1 P1 H 1 25.8 14.9 145. ## 3 A1 P2 H 4 25.7 15.0 146. ## 4 A2 P3 M 4 21.4 19.3 145. ## 5 A2 P3 L 1 20.3 20.2 142. ## 6 A2 P3 L 1 20.3 20.2 142.
data.1 %>% head(2)
## # A tibble: 2 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142.
#examine the levels of the Cond factor levels(data.1$Cond)
## [1] "H" "L" "M"
#subset the dataset to just Cond H data.3 <- data.1 %>% filter(Plot=='P1') #examine subset data data.3
## # A tibble: 3 x 7 ## Between Plot Cond Time Temp LAT LONG ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 A1 P1 H 1 25.8 14.9 145. ## 2 A1 P1 M 2 24.3 16.2 142. ## 3 A1 P1 L 3 25.0 15.5 144.
#examine the levels of the Cond factor levels(data.3$Cond)
## [1] "H" "L" "M"
levels(data.3$Plot)
## [1] "P1" "P2" "P3" "P4"
levels(data.3$Between)
## [1] "A1" "A2"
#subset the dataset to just Cond H data.3 <- data.1 %>% filter(Plot=='P1') #drop the unused factor levels from all factors data.3 <- data.3 %>% droplevels() #examine the levels of each factor levels(data.3$Cond)
## [1] "H" "L" "M"
levels(data.3$Plot)
## [1] "P1"
levels(data.3$Between)
## [1] "A1"
#subset the dataset to just Cond H data.3 <- data.1 %>% filter(Plot=='P1') #drop the unused factor levels from Cond data.3 <- data.3 %>% mutate(Plot=factor(Plot)) #examine the levels of each factor levels(data.3$Cond)
## [1] "H" "L" "M"
levels(data.3$Plot)
## [1] "P1"
levels(data.3$Between)
## [1] "A1" "A2"
| Between | Plot | Time.0 | Time.1 | Time.2 | |
|---|---|---|---|---|---|
| R1 | A1 | P1 | 8 | 14 | 14 |
| R2 | A1 | P2 | 10 | 12 | 11 |
| R3 | A2 | P3 | 7 | 11 | 8 |
| R4 | A2 | P4 | 11 | 9 | 2 |
data.w %>% pivot_longer(Time.0:Time.2, names_to = 'Time',
values_to='Count')
## OR
data.w %>% pivot_longer(c(-Between, -Plot), names_to = 'Time',
values_to='Count')
## # A tibble: 12 x 4 ## Between Plot Time Count ## <fct> <fct> <chr> <int> ## 1 A1 P1 Time.0 8 ## 2 A1 P1 Time.1 14 ## 3 A1 P1 Time.2 14 ## 4 A1 P2 Time.0 10 ## 5 A1 P2 Time.1 12 ## 6 A1 P2 Time.2 11 ## 7 A2 P3 Time.0 7 ## 8 A2 P3 Time.1 11 ## 9 A2 P3 Time.2 8 ## 10 A2 P4 Time.0 11 ## 11 A2 P4 Time.1 9 ## 12 A2 P4 Time.2 2 ## # A tibble: 12 x 4 ## Between Plot Time Count ## <fct> <fct> <chr> <int> ## 1 A1 P1 Time.0 8 ## 2 A1 P1 Time.1 14 ## 3 A1 P1 Time.2 14 ## 4 A1 P2 Time.0 10 ## 5 A1 P2 Time.1 12 ## 6 A1 P2 Time.2 11 ## 7 A2 P3 Time.0 7 ## 8 A2 P3 Time.1 11 ## 9 A2 P3 Time.2 8 ## 10 A2 P4 Time.0 11 ## 11 A2 P4 Time.1 9 ## 12 A2 P4 Time.2 2
| Between | Plot | Time.0 | Time.1 | Time.2 | |
|---|---|---|---|---|---|
| R1 | A1 | P1 | 8 | 14 | 14 |
| R2 | A1 | P2 | 10 | 12 | 11 |
| R3 | A2 | P3 | 7 | 11 | 8 |
| R4 | A2 | P4 | 11 | 9 | 2 |
## OR
data.w %>% pivot_longer(starts_with('Time'), names_to = 'Time',
values_to='Count',
names_prefix='Time.')
## # A tibble: 12 x 4 ## Between Plot Time Count ## <fct> <fct> <chr> <int> ## 1 A1 P1 0 8 ## 2 A1 P1 1 14 ## 3 A1 P1 2 14 ## 4 A1 P2 0 10 ## 5 A1 P2 1 12 ## 6 A1 P2 2 11 ## 7 A2 P3 0 7 ## 8 A2 P3 1 11 ## 9 A2 P3 2 8 ## 10 A2 P4 0 11 ## 11 A2 P4 1 9 ## 12 A2 P4 2 2
| Resp1 | Resp2 | Between | Plot | Subplot | Within |
|---|---|---|---|---|---|
| 8 | 17 | A1 | P1 | S1 | B1 |
| 10 | 18 | A1 | P1 | S1 | B2 |
| 7 | 17 | A1 | P1 | S2 | B1 |
| 11 | 21 | A1 | P1 | S2 | B2 |
| 14 | 19 | A2 | P2 | S3 | B1 |
| 12 | 13 | A2 | P2 | S3 | B2 |
| 11 | 24 | A2 | P2 | S4 | B1 |
| 9 | 18 | A2 | P2 | S4 | B2 |
| 14 | 25 | A3 | P3 | S5 | B1 |
| 11 | 18 | A3 | P3 | S5 | B2 |
| 8 | 27 | A3 | P3 | S6 | B1 |
| 2 | 22 | A3 | P3 | S6 | B2 |
| 8 | 17 | A1 | P4 | S7 | B1 |
| 10 | 22 | A1 | P4 | S7 | B2 |
| 7 | 16 | A1 | P4 | S8 | B1 |
| 12 | 13 | A1 | P4 | S8 | B2 |
| 11 | 23 | A2 | P5 | S9 | B1 |
| 12 | 19 | A2 | P5 | S9 | B2 |
| 12 | 23 | A2 | P5 | S10 | B1 |
| 10 | 21 | A2 | P5 | S10 | B2 |
| 3 | 17 | A3 | P6 | S11 | B1 |
| 11 | 16 | A3 | P6 | S11 | B2 |
| 13 | 26 | A3 | P6 | S12 | B1 |
| 7 | 28 | A3 | P6 | S12 | B2 |
data %>% head(2)
## Resp1 Resp2 Between Plot Subplot Within ## 1 8 17 A1 P1 S1 B1 ## 2 10 18 A1 P1 S1 B2
Widen Resp1 for repeated measures (Within)
data %>% select(-Resp2) %>% pivot_wider(names_from=Within, values_from=c(Resp1))
## # A tibble: 12 x 5 ## Between Plot Subplot B1 B2 ## <fct> <fct> <fct> <int> <int> ## 1 A1 P1 S1 8 10 ## 2 A1 P1 S2 7 11 ## 3 A2 P2 S3 14 12 ## 4 A2 P2 S4 11 9 ## 5 A3 P3 S5 14 11 ## 6 A3 P3 S6 8 2 ## 7 A1 P4 S7 8 10 ## 8 A1 P4 S8 7 12 ## 9 A2 P5 S9 11 12 ## 10 A2 P5 S10 12 10 ## 11 A3 P6 S11 3 11 ## 12 A3 P6 S12 13 7
Widen Resp1 and Resp2 for repeated measures (Within)
data %>% head(2)
## Resp1 Resp2 Between Plot Subplot Within ## 1 8 17 A1 P1 S1 B1 ## 2 10 18 A1 P1 S1 B2
data %>% pivot_wider(names_from=Within, values_from=c(Resp1, Resp2))
## # A tibble: 12 x 7 ## Between Plot Subplot Resp1_B1 Resp1_B2 Resp2_B1 Resp2_B2 ## <fct> <fct> <fct> <int> <int> <int> <int> ## 1 A1 P1 S1 8 10 17 18 ## 2 A1 P1 S2 7 11 17 21 ## 3 A2 P2 S3 14 12 19 13 ## 4 A2 P2 S4 11 9 24 18 ## 5 A3 P3 S5 14 11 25 18 ## 6 A3 P3 S6 8 2 27 22 ## 7 A1 P4 S7 8 10 17 22 ## 8 A1 P4 S8 7 12 16 13 ## 9 A2 P5 S9 11 12 23 19 ## 10 A2 P5 S10 12 10 23 21 ## 11 A3 P6 S11 3 11 17 16 ## 12 A3 P6 S12 13 7 26 28
Bio data (missing Subplot 3)
| Resp1 | Resp2 | Between | Plot | Subplot | |
|---|---|---|---|---|---|
| 1 | 8 | 18 | A1 | P1 | S1 |
| 2 | 10 | 21 | A1 | P1 | S2 |
| 4 | 11 | 23 | A1 | P2 | S4 |
| 5 | 14 | 22 | A2 | P3 | S5 |
| 6 | 12 | 24 | A2 | P3 | S6 |
| 7 | 11 | 23 | A2 | P4 | S7 |
| 8 | 9 | 20 | A2 | P4 | S8 |
| 9 | 14 | 11 | A3 | P5 | S9 |
| 10 | 11 | 22 | A3 | P5 | S10 |
| 11 | 8 | 24 | A3 | P6 | S11 |
| 12 | 2 | 16 | A3 | P6 | S12 |
Physio-chemical data (missing S7)
| Chem1 | Chem2 | Between | Plot | Subplot | |
|---|---|---|---|---|---|
| 1 | 1.453 | 0.8858 | A1 | P1 | S1 |
| 2 | 3.266 | 0.18 | A1 | P1 | S2 |
| 3 | 1.179 | 5.078 | A1 | P2 | S3 |
| 4 | 13.4 | 1.576 | A1 | P2 | S4 |
| 5 | 3.779 | 1.622 | A2 | P3 | S5 |
| 6 | 1.197 | 4.237 | A2 | P3 | S6 |
| 8 | 5.688 | 2.986 | A2 | P4 | S8 |
| 9 | 4.835 | 4.133 | A3 | P5 | S9 |
| 10 | 2.003 | 3.604 | A3 | P5 | S10 |
| 11 | 12.33 | 1.776 | A3 | P6 | S11 |
| 12 | 4.014 | 0.2255 | A3 | P6 | S12 |
Merge bio and chem data (only keep full matches - an inner join)
data.bio %>% inner_join(data.chem)
## Resp1 Resp2 Between Plot Subplot Chem1 Chem2 ## 1 8 18 A1 P1 S1 1.452878 0.8858208 ## 2 10 21 A1 P1 S2 3.266253 0.1800177 ## 3 11 23 A1 P2 S4 13.400350 1.5762780 ## 4 14 22 A2 P3 S5 3.779183 1.6222430 ## 5 12 24 A2 P3 S6 1.196657 4.2369184 ## 6 9 20 A2 P4 S8 5.687807 2.9859003 ## 7 14 11 A3 P5 S9 4.834518 4.1328919 ## 8 11 22 A3 P5 S10 2.002931 3.6043314 ## 9 8 24 A3 P6 S11 12.326867 1.7763576 ## 10 2 16 A3 P6 S12 4.014221 0.2255188
Merge bio and chem data (keep all data - outer join)
data.bio %>% full_join(data.chem)
## Resp1 Resp2 Between Plot Subplot Chem1 Chem2 ## 1 8 18 A1 P1 S1 1.452878 0.8858208 ## 2 10 21 A1 P1 S2 3.266253 0.1800177 ## 3 11 23 A1 P2 S4 13.400350 1.5762780 ## 4 14 22 A2 P3 S5 3.779183 1.6222430 ## 5 12 24 A2 P3 S6 1.196657 4.2369184 ## 6 11 23 A2 P4 S7 NA NA ## 7 9 20 A2 P4 S8 5.687807 2.9859003 ## 8 14 11 A3 P5 S9 4.834518 4.1328919 ## 9 11 22 A3 P5 S10 2.002931 3.6043314 ## 10 8 24 A3 P6 S11 12.326867 1.7763576 ## 11 2 16 A3 P6 S12 4.014221 0.2255188 ## 12 NA NA A1 P2 S3 1.178652 5.0780682
Merge bio and chem data (only keep full BIO matches - left join)
data.bio %>% left_join(data.chem)
## Resp1 Resp2 Between Plot Subplot Chem1 Chem2 ## 1 8 18 A1 P1 S1 1.452878 0.8858208 ## 2 10 21 A1 P1 S2 3.266253 0.1800177 ## 3 11 23 A1 P2 S4 13.400350 1.5762780 ## 4 14 22 A2 P3 S5 3.779183 1.6222430 ## 5 12 24 A2 P3 S6 1.196657 4.2369184 ## 6 11 23 A2 P4 S7 NA NA ## 7 9 20 A2 P4 S8 5.687807 2.9859003 ## 8 14 11 A3 P5 S9 4.834518 4.1328919 ## 9 11 22 A3 P5 S10 2.002931 3.6043314 ## 10 8 24 A3 P6 S11 12.326867 1.7763576 ## 11 2 16 A3 P6 S12 4.014221 0.2255188
Merge bio and chem data (only keep full CHEM matches - right join)
data.bio %>% right_join(data.chem)
## Resp1 Resp2 Between Plot Subplot Chem1 Chem2 ## 1 8 18 A1 P1 S1 1.452878 0.8858208 ## 2 10 21 A1 P1 S2 3.266253 0.1800177 ## 3 11 23 A1 P2 S4 13.400350 1.5762780 ## 4 14 22 A2 P3 S5 3.779183 1.6222430 ## 5 12 24 A2 P3 S6 1.196657 4.2369184 ## 6 9 20 A2 P4 S8 5.687807 2.9859003 ## 7 14 11 A3 P5 S9 4.834518 4.1328919 ## 8 11 22 A3 P5 S10 2.002931 3.6043314 ## 9 8 24 A3 P6 S11 12.326867 1.7763576 ## 10 2 16 A3 P6 S12 4.014221 0.2255188 ## 11 NA NA A1 P2 S3 1.178652 5.0780682
Biological data set (data.bio)
## Resp1 Resp2 Between Plot Subplot ## 1 8 18 A1 P1 S1 ## 2 10 21 A1 P1 S2 ## 4 11 23 A1 P2 S4 ## 5 14 22 A2 P3 S5 ## 6 12 24 A2 P3 S6 ## 7 11 23 A2 P4 S7 ## 8 9 20 A2 P4 S8 ## 9 14 11 A3 P5 S9 ## 10 11 22 A3 P5 S10 ## 11 8 24 A3 P6 S11 ## 12 2 16 A3 P6 S12
Geographical data set (lookup table) (data.geo)
## Plot LAT LONG ## 1 P1 17.9605 145.4326 ## 2 P2 17.5210 146.1983 ## 3 P3 17.0011 146.3839 ## 4 P4 18.2350 146.7934 ## 5 P5 18.9840 146.0345 ## 6 P6 20.1154 146.4672
Incorporate (merge) the lat/longs into the bio data
data.bio %>% left_join(data.geo,by=c("Plot"))
## Resp1 Resp2 Between Plot Subplot LAT LONG ## 1 8 18 A1 P1 S1 17.9605 145.4326 ## 2 10 21 A1 P1 S2 17.9605 145.4326 ## 3 11 23 A1 P2 S4 17.5210 146.1983 ## 4 14 22 A2 P3 S5 17.0011 146.3839 ## 5 12 24 A2 P3 S6 17.0011 146.3839 ## 6 11 23 A2 P4 S7 18.2350 146.7934 ## 7 9 20 A2 P4 S8 18.2350 146.7934 ## 8 14 11 A3 P5 S9 18.9840 146.0345 ## 9 11 22 A3 P5 S10 18.9840 146.0345 ## 10 8 24 A3 P6 S11 20.1154 146.4672 ## 11 2 16 A3 P6 S12 20.1154 146.4672
## Psammocora contigua Psammocora digitata time rep ## V1 0 0 81 1 ## V2 0 0 81 2 ## V3 0 0 81 3 ## V4 0 0 81 4 ## V5 0 0 81 5 ## V6 0 0 81 6 ## V7 0 0 81 7 ## V8 0 0 81 8 ## V9 0 0 81 9 ## V10 0 0 81 10
## Rows: 60 ## Columns: 77 ## $ `Psammocora contigua` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Psammocora digitata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Pocillopora damicornis` <int> 79, 51, 42, 15, 9, 72, 0, 16, 0, 1… ## $ `Pocillopora verrucosa` <int> 32, 21, 35, 0, 0, 0, 41, 25, 38, 0… ## $ `Stylopora pistillata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Acropora bruegemanni` <int> 0, 44, 0, 11, 9, 10, 0, 0, 0, 37, … ## $ `Acropora robusta` <int> 0, 35, 40, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ `Acropora grandis` <int> 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 0, … ## $ `Acropora intermedia` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Acropora formosa` <int> 75, 0, 15, 0, 125, 0, 0, 0, 10, 0,… ## $ `Acropora splendida` <int> 0, 22, 0, 31, 0, 9, 16, 0, 0, 20, … ## $ `Acropera aspera` <int> 17, 18, 9, 8, 23, 0, 17, 13, 16, 1… ## $ `Acropora hyacinthus` <int> 141, 34, 55, 54, 0, 0, 0, 0, 0, 0,… ## $ `Acropora palifera` <int> 32, 0, 44, 0, 17, 0, 0, 0, 0, 0, 0… ## $ `Acropora cytherea` <int> 108, 33, 14, 122, 0, 0, 0, 8, 0, 0… ## $ `Acropora tenuis` <int> 0, 25, 0, 0, 0, 22, 28, 0, 0, 0, 0… ## $ `Acropora pulchra` <int> 0, 0, 15, 52, 62, 33, 0, 0, 24, 0,… ## $ `Acropora nasuta` <int> 43, 21, 19, 0, 0, 0, 10, 0, 0, 0, … ## $ `Acropora humilis` <int> 31, 25, 0, 19, 0, 0, 0, 0, 0, 0, 0… ## $ `Acropora diversa` <int> 22, 19, 20, 13, 23, 14, 0, 12, 12,… ## $ `Acropora digitifera` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Acropora divaricata` <int> 0, 32, 55, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ `Acropora subglabra` <int> 51, 0, 0, 44, 15, 0, 0, 25, 0, 0, … ## $ `Acropora cerealis` <int> 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Acropora valida` <int> 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, … ## $ `Acropora acuminata` <int> 20, 0, 71, 0, 15, 0, 25, 25, 0, 0,… ## $ `Acropora elsevi` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Acropora millepora` <int> 17, 14, 0, 20, 0, 0, 0, 0, 0, 0, 0… ## $ `Montipora monasteriata` <int> 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Montipora tuberculosa` <int> 0, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ `Montipora hispida` <int> 0, 0, 0, 32, 40, 24, 0, 0, 0, 0, 0… ## $ `Montipora digitata` <int> 0, 0, 0, 0, 0, 77, 84, 53, 71, 351… ## $ `Montipora foliosa` <int> 0, 0, 0, 0, 50, 71, 62, 81, 24, 0,… ## $ `Montipora verrucosa` <int> 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Fungia fungites` <int> 0, 0, 18, 17, 0, 0, 0, 0, 0, 0, 0,… ## $ `Fungia paumotensis` <int> 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 12,… ## $ `Fungia concina` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1… ## $ `Fungia scutaria` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Halomitra limax` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Pavona varians` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,… ## $ `Pavona venosa` <int> 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Pavona cactus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Coeloseris mayeri` <int> 20, 0, 15, 0, 9, 19, 0, 0, 25, 0, … ## $ `Galaxea fascicularis` <int> 51, 27, 31, 24, 0, 13, 0, 0, 0, 0,… ## $ `Symphyllia radians` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Lobophyllia corymbosa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1… ## $ `Lobophyllia hemprichii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Porites cylindrica` <int> 61, 24, 0, 20, 0, 0, 0, 0, 0, 0, 1… ## $ `Porites lichen` <int> 0, 47, 49, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ `Porites lobata` <int> 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Porites lutea` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Porites nigrescens` <int> 0, 0, 0, 21, 0, 9, 25, 0, 45, 26, … ## $ `Porites solida` <int> 0, 0, 10, 0, 17, 0, 31, 41, 0, 0, … ## $ `Porites stephensoni` <int> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, … ## $ `Goniopora lobata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Favia pallida` <int> 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ `Favia speciosa` <int> 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Favia stelligera` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Favia rotumana` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Favites abdita` <int> 33, 41, 23, 27, 91, 63, 72, 48, 71… ## $ `Favites chinensis` <int> 0, 44, 78, 61, 44, 0, 55, 30, 30, … ## $ `Goniastrea rectiformis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0… ## $ `Goniastrea pectinata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Goniastrea sp` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Dulophyllia crispa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9… ## $ `Platygyra daedalea` <int> 0, 27, 55, 0, 71, 74, 55, 48, 0, 0… ## $ `Platygyra sinensis` <int> 47, 27, 56, 26, 0, 0, 0, 0, 0, 0, … ## $ `Hydnopora rigida` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Leptastrea purpurea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Leptastrea pruinosa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… ## $ `Cyphastrea serailia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, … ## $ `Millepora platyphylla` <int> 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Millepora dichotoma` <int> 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Millepora intrincata` <int> 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ `Heliopora coerulea` <int> 461, 271, 221, 154, 0, 0, 0, 0, 0,… ## $ time <fct> 81, 81, 81, 81, 81, 81, 81, 81, 81… ## $ rep <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, …
Explore/Process data
Explore/Process data
Step 1. fix typo (rename) - backticks
tikus %>% rename(`Acropora aspera`=`Acropera aspera`)
Explore/Process data
Step 2. melt data (gather)
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep)
Explore/Process data
Step 3. Calculate Cover (mutate) (Abundance/10)
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10)
Explore/Process data
Step 4. Split species into Genera and Species (separate)
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species'))
Explore/Process data
Step 5. Subset just ‘Acropora’ (filter)
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
filter(Genera=='Acropora')
Explore/Process data
Step 6. Sum over all Species (group_by and summarise)
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
filter(Genera=='Acropora') %>%
group_by(time,rep) %>%
summarise(SumCover=sum(Cover))
Explore/Process data
Step 7. Summarise per year
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
filter(Genera=='Acropora') %>%
group_by(time,rep) %>%
summarise(SumCover=sum(Cover)) %>%
group_by(time) %>%
summarise(Mean=mean(SumCover),
Var=var(SumCover))
## # A tibble: 6 x 3 ## time Mean Var ## <fct> <dbl> <dbl> ## 1 81 25.6 383. ## 2 83 0 0 ## 3 84 0 0 ## 4 85 2.43 14.2 ## 5 87 8.01 68.5 ## 6 88 8.55 106.
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
filter(Genera=='Acropora') %>%
group_by(time,rep) %>%
summarise(SumCover=sum(Cover)) %>%
group_by(time) %>%
summarise(Mean=mean(SumCover),
Var=var(SumCover))
## # A tibble: 6 x 3 ## time Mean Var ## <fct> <dbl> <dbl> ## 1 81 25.6 383. ## 2 83 0 0 ## 3 84 0 0 ## 4 85 2.43 14.2 ## 5 87 8.01 68.5 ## 6 88 8.55 106.
Can you modify so that we get the means and var for each Genera per year?
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
group_by(time,rep,Genera) %>%
summarise(SumCover=sum(Cover)) %>%
group_by(time,Genera) %>%
summarise(Mean=mean(SumCover),
Var=var(SumCover))
## # A tibble: 144 x 4 ## # Groups: time [6] ## time Genera Mean Var ## <fct> <chr> <dbl> <dbl> ## 1 81 Acropora 25.6 383. ## 2 81 Coeloseris 0.88 1.02 ## 3 81 Cyphastrea 0 0 ## 4 81 Dulophyllia 0 0 ## 5 81 Favia 0.6 1.16 ## 6 81 Favites 8.22 14.9 ## 7 81 Fungia 0.68 1.38 ## 8 81 Galaxea 1.46 3.23 ## 9 81 Goniastrea 0 0 ## 10 81 Goniopora 0 0 ## # … with 134 more rows
What about the means and var for the top 3 Genera per year (sorted from highest to lowest)?
tikus %>% rename(`Acropora aspera`=`Acropera aspera`) %>%
gather(Species, Abundance,-time,-rep) %>%
mutate(Cover=Abundance/10) %>%
separate(Species,c('Genera','Species')) %>%
group_by(time,rep,Genera) %>%
summarise(SumCover=sum(Cover)) %>%
group_by(time,Genera) %>%
summarise(Mean=mean(SumCover),
Var=var(SumCover)) %>%
top_n(3,Mean) %>%
arrange(desc(Mean))
## # A tibble: 18 x 4 ## # Groups: time [6] ## time Genera Mean Var ## <fct> <chr> <dbl> <dbl> ## 1 87 Montipora 27.4 966. ## 2 81 Acropora 25.6 383. ## 3 85 Montipora 20.5 171. ## 4 85 Porites 19.0 51.3 ## 5 88 Montipora 11.8 644. ## 6 81 Montipora 11.4 95.7 ## 7 81 Heliopora 11.1 262. ## 8 84 Montipora 11.0 70.5 ## 9 88 Porites 9.84 41.4 ## 10 88 Acropora 8.55 106. ## 11 87 Acropora 8.01 68.5 ## 12 87 Porites 4.49 35.8 ## 13 84 Porites 2.94 6.65 ## 14 85 Platygyra 2.55 8.74 ## 15 83 Porites 1.74 2.07 ## 16 84 Pavona 1.2 3.33 ## 17 83 Fungia 1.14 3.64 ## 18 83 Montipora 0.93 1.57